In [ ]:
from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split

digits = load_digits()


X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [ ]:
sgd = SGDClassifier(n_iter=5, loss="hinge", penalty="l2")
sgd.fit(X_train_scaled, y_train)
print(sgd.score(X_test_scaled, y_test))

In [ ]:
sgd = SGDClassifier(shuffle=False)
sgd.partial_fit(X_train_scaled, y_train, classes=range(10))
print(sgd.score(X_test_scaled, y_test))
sgd.partial_fit(X_train_scaled, y_train)
print(sgd.score(X_test_scaled, y_test))
sgd.partial_fit(X_train_scaled, y_train)
print(sgd.score(X_test_scaled, y_test))

Exercise

Record the training and test loss for 10 iterations using constant learning rate and "invscaling" learning rate. Plot the resulting convergence curves. Try different learning rates.